{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import sqlite3\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "conn = sqlite3.connect(\"/data/movie-lens.db\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Toy Story (1995)</td>\n",
       "      <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Jumanji (1995)</td>\n",
       "      <td>Adventure|Children|Fantasy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Grumpier Old Men (1995)</td>\n",
       "      <td>Comedy|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Waiting to Exhale (1995)</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Father of the Bride Part II (1995)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movieId                               title  \\\n",
       "0        1                    Toy Story (1995)   \n",
       "1        2                      Jumanji (1995)   \n",
       "2        3             Grumpier Old Men (1995)   \n",
       "3        4            Waiting to Exhale (1995)   \n",
       "4        5  Father of the Bride Part II (1995)   \n",
       "\n",
       "                                        genres  \n",
       "0  Adventure|Animation|Children|Comedy|Fantasy  \n",
       "1                   Adventure|Children|Fantasy  \n",
       "2                               Comedy|Romance  \n",
       "3                         Comedy|Drama|Romance  \n",
       "4                                       Comedy  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies = pd.read_sql(\"select * from movies\", conn)\n",
    "movies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>31</td>\n",
       "      <td>2.5</td>\n",
       "      <td>1260759144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1029</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1260759179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1061</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1260759182</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1129</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1260759185</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>1172</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1260759205</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  movieId  rating   timestamp\n",
       "0       1       31     2.5  1260759144\n",
       "1       1     1029     3.0  1260759179\n",
       "2       1     1061     3.0  1260759182\n",
       "3       1     1129     2.0  1260759185\n",
       "4       1     1172     4.0  1260759205"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ratings = pd.read_sql(\"select * from ratings\", conn)\n",
    "ratings.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEcCAYAAAAP5CkrAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XuUXXV99/H3hxDuVYJEhCQQtNES\nrQaMgVWs4KWQgIj0wRZsJfqgsRWW8LSPNVL7gBdcaa3aUpWKkhKwGvGCRAliQMHlBUiQQLgnQiRD\nEALhDgXBz/PH/g0cJ2dmzkz2uQz5vNY6a/b57tt375lzvrP3/u3flm0iIiLqsFW3E4iIiOePFJWI\niKhNikpERNQmRSUiImqTohIREbVJUYmIiNqkqET0CEnvlvTTGpd3lKR1kh6VtG9dy90ckk6R9JVu\n5xHtk6ISbSdpraQnypdb/2uPbuc1lkm6XNJ7h5nsX4ETbe9k+9pO5NVI0sGS+hpjtj9le7i8YwxL\nUYlOOaJ8ufW/1g+cQNLW3UjseWwv4MbRzChp3DDjJSnfH7GJ/FFE10iaKsmSjpd0J/CjEj9A0s8l\nPSjpOkkHN8yzt6QrJD0iaZmkz0v6ahm3yX/G5SjpLWV4K0nzJf1K0v2Szpe0y4Bc5kq6U9J9kv6x\nYTnjyqmbX5V1XyNpiqQvSPrMgHV+T9LJg2yzJX1Q0u1lHZ8e7MtZ0p9IWi7pofLzT0r8dOBPgc+X\no77PD5hvW0mPAuOA6yT9qsT3KUc4D0q6UdLbGuY5R9KZkpZKegx4Y5N8Lpd0uqSfAY8DL5X0Hkk3\nl31yu6T3l2l3BC4G9mg8OpV0WsPva7h9vr2kRZIeKOv4h4G/3+hBtvPKq60vYC3wlibxqYCBc4Ed\nge2BScD9wGFU//T8WXk/sczzC+CzwLbAG4BHgK+WcQcDfYOtGzgZuBKYXOb/EvD1Abl8ueTxGuBJ\nYJ8y/kPAKuAVgMr4FwGzgPXAVmW6Xam+cHcbZF8Y+DGwC7AncBvw3jLu3cBPy/AuwAPAu4CtgWPL\n+xeV8Zf3zzfEfjfwh2V4PLAGOAXYBnhT2XevKOPPAR4CDiz7fbsmy7scuBN4ZclpPHA48LKyTw4q\n277fEL+P0xp+X8Pt8wXAFcCE8ju7fuDy8uq9V45UolO+W/5DflDSdweMO832Y7afAP4aWGp7qe3f\n2V4GrAAOk7Qn8Drgn2w/afsnwPdGkMP7gX+03Wf7SaovuKMHnHb7mO0nbF8HXEf1RQfwXuCjtm91\n5Trb99u+murL+M1lumOAy23fM0Qe/2x7o+07gX+jKhgDHQ6stn2e7adtfx24BThiBNvb6ABgJ2CB\n7ads/wj4/oB1X2j7Z2W//88gyznH9o0lp9/avsj2r8o+uQL4IdVR1EgMts//AviU7Qds9wFnjHC5\n0QUpKtEpb7e9c3m9fcC4dQ3DewHvaChADwKvB3YH9gAesP1Yw/S/HkEOewEXNCz3ZuAZYLeGaX7T\nMPw41RcxwBTgV4MsdxFVMaT8PG+YPBq399dU2zXQHmy6bb+mOpIbjT2AdbZ/N8Ty1jG835tG0hxJ\nV0raWPbpYVRHayMx2D7fY8D6WskvuixFJXpBY1fZ64DzGgrQzrZ3tL0AuBuYUM7X99uzYfgxYIf+\nN+Vi88QBy54zYNnb2b6rhRzXUZ3maearwJGSXgPsAww8EhtoyoD8N2m0UGJ7DYjtCfTnOtLuxdcD\nUwZcv2lcXqvLfHYaSdsC36ZqZbab7Z2BpVSnwkaT40B3U5326jdlsAmjd6SoRK/5KnCEpEPLxfHt\nygX4ybZ/TXUq7GOStpH0en7/dNBtwHaSDpc0Hvgo1bWTfv8JnC5pLwBJEyUd2WJeXwE+IWmaKq+W\n9CKAcmpmOdURyrfLabyhfEjSBElTgJOAbzSZZinwcknvlLS1pL8EplOdsgK4B3hpi7kDXEVVdP9B\n0nhVjR+OABaPYBkDbUO1fzcAT0uaAxzSMP4e4EWSXjjK5Z8PfKTsq0nAiZuRa3RIikr0FNvrgCOp\nLihvoDpC+BDP/a2+E9gf2AicSnWRv3/eh4APUBWAu6i+RBtbC/07sAT4oaRHqC7a799iap+l+pL7\nIfAwcDbVxeV+i4A/ZvhTXwAXAtcAK4GLyrJ+j+37gbcCf0/VUOEfgLfavq9hW44uLaOGvdZg+yng\nbcAc4D7gi8Bxtm9pId/BlvkI8EGq/fIA1e9mScP4W4CvA7eXU44jvTfp41S/vzuAS4FvUV3Ijx4m\nOw/pirFL0mlULZz+erhp25zHG6iOsqYOuG4xcDoD02yv6VhyzxOS/hY4xvZB3c4lBpcjlYjNVE61\nnQR8ZaiCEiMjaXdJB6q6v+gVVEdtF3Q7rxhaikrEZpC0D/AgVeu0f+tyOs8321DdS/QI1Y2xF1Kd\ntoseltNfERFRm7YdqZRWO1er6mbjRkkfK/G9JV0labWkb0japsS3Le/XlPFTG5b1kRK/VdKhDfHZ\nJbZG0vx2bUtERLSmbUcqkgTsaPvRcs75p1Tnnf8O+I7txZL+E7jO9pmSPgC82vbfSDoGOMr2X0qa\nTtWCZBbVzVCXAi8vq7mNqhuP/iadx9q+aai8dt11V0+dOrX27Y2IeD675ppr7rM9cbjp2tYrrKtq\n9Wh5O768TNXn0DtLfBFVVxlnUjUjPa3Ev0XVWZ5KfHHpVuMOSWuoCgzAGtu3A0haXKYdsqhMnTqV\nFStWbO7mRURsUSS11HtFWy/Ul5vXVgL3Asuourl40PbTZZI+nusmYhKlG4Yy/iGqDvuejQ+YZ7B4\nszzmSVohacWGDRvq2LSIiGiirUXF9jO2Z1B1tTCLqguLTSYrPzXIuJHGm+Vxlu2ZtmdOnDjs0VtE\nRIxSR5oU236QqtvsA4CdG3qFncxz/R71Ufr2KeNfSHXX9LPxAfMMFo+IiC5pZ+uviZJ2LsPbA2+h\n6hX2x8DRZbK5VG3PoereYW4ZPhr4UbkuswQ4prQO2xuYBlxNdWF+WmlNtg1Vl+PPdhERERGd187H\nt+4OLCo9xW4FnG/7+5JuAhZL+iRwLc/1e3Q2cF65EL+Rqkhg+0ZJ51NdgH8aOMH2MwCSTgQuoXrC\n3ULbo3p0akRE1GOLu/lx5syZTuuviIiRkXSN7ZnDTZduWiIiojYpKhERUZsUlYiIqE07L9RHRLTF\n1PkXbfYy1i44vIZMYqAcqURERG1SVCIiojYpKhERUZsUlYiIqE2KSkRE1CZFJSIiapOiEhERtUlR\niYiI2qSoREREbVJUIiKiNikqERFRmxSViIioTYpKRETUJkUlIiJqk6ISERG1SVGJiIjapKhERERt\nUlQiIqI2KSoREVGbFJWIiKhNikpERNQmRSUiImqTohIREbVpW1GRNEXSjyXdLOlGSSeV+GmS7pK0\nsrwOa5jnI5LWSLpV0qEN8dkltkbS/Ib43pKukrRa0jckbdOu7YmIiOG180jlaeDvbe8DHACcIGl6\nGfc52zPKaylAGXcM8EpgNvBFSeMkjQO+AMwBpgPHNiznn8uypgEPAMe3cXsiImIYbSsqtu+2/csy\n/AhwMzBpiFmOBBbbftL2HcAaYFZ5rbF9u+2ngMXAkZIEvAn4Vpl/EfD29mxNRES0oiPXVCRNBfYF\nriqhEyVdL2mhpAklNglY1zBbX4kNFn8R8KDtpwfEm61/nqQVklZs2LChhi2KiIhm2l5UJO0EfBs4\n2fbDwJnAy4AZwN3AZ/onbTK7RxHfNGifZXum7ZkTJ04c4RZERESrtm7nwiWNpyoo/237OwC272kY\n/2Xg++VtHzClYfbJwPoy3Cx+H7CzpK3L0Urj9BER0QXtbP0l4GzgZtufbYjv3jDZUcANZXgJcIyk\nbSXtDUwDrgaWA9NKS69tqC7mL7Ft4MfA0WX+ucCF7dqeiIgYXjuPVA4E3gWskrSyxE6har01g+pU\n1Vrg/QC2b5R0PnATVcuxE2w/AyDpROASYByw0PaNZXkfBhZL+iRwLVURi4iILmlbUbH9U5pf91g6\nxDynA6c3iS9tNp/t26lah0VERA/IHfUREVGbFJWIiKhNikpERNQmRSUiImqTohIREbVJUYmIiNqk\nqERERG1SVCIiojYpKhERUZsUlYiIqE2KSkRE1CZFJSIiapOiEhERtUlRiYiI2qSoREREbVJUIiKi\nNikqERFRmxSViIioTYpKRETUJkUlIiJqk6ISERG1SVGJiIjapKhERERtUlQiIqI2KSoREVGbFJWI\niKhNikpERNSmbUVF0hRJP5Z0s6QbJZ1U4rtIWiZpdfk5ocQl6QxJayRdL2m/hmXNLdOvljS3If5a\nSavKPGdIUru2JyIihtfOI5Wngb+3vQ9wAHCCpOnAfOAy29OAy8p7gDnAtPKaB5wJVRECTgX2B2YB\np/YXojLNvIb5ZrdxeyIiYhhbt2vBtu8G7i7Dj0i6GZgEHAkcXCZbBFwOfLjEz7Vt4EpJO0vavUy7\nzPZGAEnLgNmSLgdeYPsXJX4u8Hbg4nZtU0Q3TZ1/0WYvY+2Cw2vIJGJwHbmmImkqsC9wFbBbKTj9\nhefFZbJJwLqG2fpKbKh4X5N4s/XPk7RC0ooNGzZs7uZERMQg2l5UJO0EfBs42fbDQ03aJOZRxDcN\n2mfZnml75sSJE4dLOSIiRqmtRUXSeKqC8t+2v1PC95TTWpSf95Z4HzClYfbJwPph4pObxCMiokva\n2fpLwNnAzbY/2zBqCdDfgmsucGFD/LjSCuwA4KFyeuwS4BBJE8oF+kOAS8q4RyQdUNZ1XMOyIiKi\nC9p2oR44EHgXsErSyhI7BVgAnC/peOBO4B1l3FLgMGAN8DjwHgDbGyV9Alhepvt4/0V74G+Bc4Dt\nqS7Q5yJ9REQXtbP1109pft0D4M1NpjdwwiDLWggsbBJfAbxqM9KMiIga5Y76iIioTYpKRETUJkUl\nIiJqk6ISERG1SVGJiIjatFRUJKWFVUREDKvVI5X/lHS1pA9I2rmtGUVExJjVUlGx/Xrgr6i6S1kh\n6WuS/qytmUVExJjT8jUV26uBj1J1U38QcIakWyT9ebuSi4iIsaXVayqvlvQ54GbgTcAR5eFbbwI+\n18b8IiJiDGm1m5bPA18GTrH9RH/Q9npJH21LZhERMea0WlQOA56w/QyApK2A7Ww/bvu8tmUXERFj\nSqvXVC6l6gm43w4lFhER8axWi8p2th/tf1OGd2hPShERMVa1WlQek7Rf/xtJrwWeGGL6iIjYArV6\nTeVk4JuS+h/Xuzvwl+1JKSIixqqWiort5ZL+CHgF1YO3brH927ZmFhERY85Invz4OmBqmWdfSdg+\nty1ZRUTEmNRSUZF0HvAyYCXwTAkbSFGJiIhntXqkMhOYXp4jHxER0VSrrb9uAF7SzkQiImLsa/VI\nZVfgJklXA0/2B22/rS1ZRUTEmNRqUTmtnUlERMTzQ6tNiq+QtBcwzfalknYAxrU3tYiIGGta7fr+\nfcC3gC+V0CTgu+1KKiIixqZWL9SfABwIPAzPPrDrxe1KKiIixqZWi8qTtp/qfyNpa6r7VCIiIp7V\nalG5QtIpwPbl2fTfBL431AySFkq6V9INDbHTJN0laWV5HdYw7iOS1ki6VdKhDfHZJbZG0vyG+N6S\nrpK0WtI3JG3T6kZHRER7tFpU5gMbgFXA+4GlVM+rH8o5wOwm8c/ZnlFeSwEkTQeOAV5Z5vmipHGS\nxgFfAOYA04Fjy7QA/1yWNQ14ADi+xW2JiIg2abX11++oHif85VYXbPsnkqa2OPmRwGLbTwJ3SFoD\nzCrj1ti+HUDSYuBISTcDbwLeWaZZRNXs+cxW84uIeD6YOv+izV7G2gWH15BJpdXWX3dIun3ga5Tr\nPFHS9eX02IQSmwSsa5imr8QGi78IeND20wPig+U/T9IKSSs2bNgwyrQjImI4rZ7+mknVS/HrgD8F\nzgC+Oor1nUnVMeUM4G7gMyWuJtN6FPGmbJ9le6btmRMnThxZxhER0bKWiort+xted9n+N6rTTyNi\n+x7bzzScTus/xdUHTGmYdDKwfoj4fcDOpRVaYzwiIrqo1a7v92t4uxXVkcsfjHRlkna3fXd5exRV\nR5UAS4CvSfossAcwDbia6ohkmqS9gbuoLua/07Yl/Rg4GlgMzAUuHGk+ERFRr1b7/vpMw/DTwFrg\nL4aaQdLXgYOBXSX1AacCB0uaQXWqai1VSzJs3yjpfOCmsvwTbD9TlnMicAlVtzALbd9YVvFhYLGk\nTwLXAme3uC0REdEmrbb+euNIF2z72CbhQb/4bZ8OnN4kvpSqCfPA+O08d/osIiJ6QKunv/5uqPG2\nP1tPOhERMZaN5MmPr6O69gFwBPATfr+5b0REbOFG8pCu/Ww/AlV3K8A3bb+3XYlFRMTY0+p9KnsC\nTzW8fwqYWns2ERExprV6pHIecLWkC6habh0FnNu2rCIiYkxqtfXX6ZIuprqbHuA9tq9tX1oRETEW\ntXr6C2AH4GHb/w70lRsSIyIintVqh5KnUt1s+JESGs/o+v6KiIjnsVaPVI4C3gY8BmB7PaPopiUi\nIp7fWi0qT9k2pSdgSTu2L6WIiBirWi0q50v6ElXPwO8DLmUED+yKiIgtQ6utv/61PJv+YeAVwP+z\nvaytmUVExJgzbFEpz4m/xPZbgBSSiIgY1LCnv0oX9I9LemEH8omIiDGs1Tvq/wdYJWkZpQUYgO0P\ntiWriIgYk1otKheVV0RExKCGLCqS9rR9p+1FnUooIiLGruGuqXy3f0DSt9ucS0REjHHDFRU1DL+0\nnYlERMTYN1xR8SDDERERmxjuQv1rJD1MdcSyfRmmvLftF7Q1u4iIGFOGLCq2x3UqkYiIGPtG8jyV\niIiIIaWoREREbVJUIiKiNikqERFRm1a7aYmIAGDq/M3vsWntgsNryCR6UduOVCQtlHSvpBsaYrtI\nWiZpdfk5ocQl6QxJayRdL2m/hnnmlulXS5rbEH+tpFVlnjMkiYiI6Kp2nv46B5g9IDYfuMz2NOCy\n8h5gDjCtvOYBZ0JVhIBTgf2BWcCp/YWoTDOvYb6B64qIiA5rW1Gx/RNg44DwkUB/55SLgLc3xM91\n5UqqxxbvDhwKLLO90fYDVA8Jm13GvcD2L2wbOLdhWRER0SWdvlC/m+27AcrPF5f4JGBdw3R9JTZU\nvK9JvClJ8yStkLRiw4YNm70RERHRXK+0/mp2PcSjiDdl+yzbM23PnDhx4ihTjIiI4XS6qNxTTl1R\nft5b4n3AlIbpJgPrh4lPbhKPiIgu6nRRWQL0t+CaC1zYED+utAI7AHionB67BDhE0oRygf4Q4JIy\n7hFJB5RWX8c1LCsiIrqkbfepSPo6cDCwq6Q+qlZcC4DzJR0P3Am8o0y+FDgMWAM8DrwHwPZGSZ8A\nlpfpPm67/+L/31K1MNseuLi8IiKii9pWVGwfO8ioNzeZ1sAJgyxnIbCwSXwF8KrNyTEiIurVKxfq\nIyLieSBFJSIiapOiEhERtUlRiYiI2qSoREREbVJUIiKiNikqERFRmxSViIioTYpKRETUJkUlIiJq\nk2fUR0SM0tT5F232MtYuOLyGTHpHjlQiIqI2KSoREVGbnP6Knre5pxieb6cXInpZjlQiIqI2KSoR\nEVGbFJWIiKhNikpERNQmRSUiImqTohIREbVJUYmIiNqkqERERG1SVCIiojYpKhERUZsUlYiIqE2K\nSkRE1CZFJSIiatOVoiJpraRVklZKWlFiu0haJml1+TmhxCXpDElrJF0vab+G5cwt06+WNLcb2xIR\nEc/p5pHKG23PsD2zvJ8PXGZ7GnBZeQ8wB5hWXvOAM6EqQsCpwP7ALODU/kIUERHd0Uunv44EFpXh\nRcDbG+LnunIlsLOk3YFDgWW2N9p+AFgGzO500hER8ZxuFRUDP5R0jaR5Jbab7bsBys8Xl/gkYF3D\nvH0lNlh8E5LmSVohacWGDRtq3IyIiGjUrSc/Hmh7vaQXA8sk3TLEtGoS8xDxTYP2WcBZADNnzmw6\nTUREbL6uHKnYXl9+3gtcQHVN5J5yWovy894yeR8wpWH2ycD6IeIREdElHS8qknaU9Af9w8AhwA3A\nEqC/Bddc4MIyvAQ4rrQCOwB4qJweuwQ4RNKEcoH+kBKLiIgu6cbpr92ACyT1r/9rtn8gaTlwvqTj\ngTuBd5TplwKHAWuAx4H3ANjeKOkTwPIy3cdtb+zcZsSWZOr8izZ7GWsXHF5DJhG9reNFxfbtwGua\nxO8H3twkbuCEQZa1EFhYd44RETE6vdSkOCIixrgUlYiIqE2KSkRE1CZFJSIiapOiEhERtUlRiYiI\n2nSrm5YYxubeF5F7IiKiG3KkEhERtUlRiYiI2qSoREREbVJUIiKiNikqERFRmxSViIioTYpKRETU\nJkUlIiJqk6ISERG1SVGJiIjapKhERERt0vdXDCrPZY+IkcqRSkRE1CZFJSIiapOiEhERtck1lQFy\nHSEiYvRypBIREbVJUYmIiNqkqERERG1SVCIiojZjvqhImi3pVklrJM3vdj4REVuyMV1UJI0DvgDM\nAaYDx0qa3t2sIiK2XGO6qACzgDW2b7f9FLAYOLLLOUVEbLFku9s5jJqko4HZtt9b3r8L2N/2iQOm\nmwfMK29fAdy6GavdFbhvM+avSy/k0Qs5QG/k0Qs5QG/k0Qs5QG/k0Qs5QD157GV74nATjfWbH9Uk\ntkmVtH0WcFYtK5RW2J5Zx7LGeh69kEOv5NELOfRKHr2QQ6/k0Qs5dDqPsX76qw+Y0vB+MrC+S7lE\nRGzxxnpRWQ5Mk7S3pG2AY4AlXc4pImKLNaZPf9l+WtKJwCXAOGCh7RvbvNpaTqPVoBfy6IUcoDfy\n6IUcoDfy6IUcoDfy6IUcoIN5jOkL9RER0VvG+umviIjoISkqERFRmxSViIioTYpKxPOApF0kTeh2\nHt2W/dB9KSoxJknaTdJ+kvaVtFsP5LNLF9a5p6TFkjYAVwHLJd1bYlM7nU+3ZD80163PSIrKECT9\n74bhyZIuk/SgpJ9LenkX8umpL1Lo/JeppBmSrgQuB/4F+DRwhaQrJe3XoRw+2jA8XdJtwDWS1kra\nvxM5FN8ALgBeYnua7T8Edge+S9UPXtv1yGek6/sBemZfdP8zYjuvQV7ALxuGzwfeT1WIjwIu62Ae\nM4ArgZuBS8vrlhLbr4N5fLRheDpwG3AHsJaqz7VO5LCy2bqAA4DruvB3cREwpwzPAn7ewd/H6tGM\na+O+6MpnpBf2Q6/si7Lurn5GcqTSupfb/pLt39m+AOjkf+jnACfZ3sf2W8rrj4CTgf/qYB5/3jD8\n6ZLT3sBfAJ/rUA472r5qYND2lcCOHcqh0R62Ly45XA1s38F1XyPpi5L2l7RHee0v6YvAtR3Mo1+3\nPiO9th+gu98XXf2MjOk76jtgsqQzqDqunChpvO3flnHjO5jHoH8kkrrxRQoDvkwlderL9GJJFwHn\nAutKbApwHPCDDuXwUklLqP4uJkvawfbjZVwn/y6OA44HPgZMKvmsA74HnN2hHHrhM9IL+wF6Y19A\nlz8jKSpD+1DD8ApgJ+ABSS+hs32M9cIXKfTAl6ntD0qaQ/XcnP4vkD7gC7aXdiIHNn1mz1ZQXfMC\nzuxQDrh6htCZnVxnE13/jPTIfoAe2BfQ/c9IumkZIwb5I1nSwS9SJB00IHSN7UfLl+nRtr/QqVxi\naJLeavv73c6j27IfOi/XVEZJ0ls7uT7bF9v+G9tH2H5rGe5YQSk5XDHg9WiJ39MLBaU8jG2Lz6F4\nXbcT6PRnZBBd3w/QM/uiI3+fKSqj1yt/rD3xJdYjeTR7aFundTQHSbMkva4MT5f0d5IOs31qJ/MY\nRNc+I5LOBeiR/QA98n1BB/4+c01lGJL+iOdOO5nqIWBLeuiPtRe+SKGDeZTfySTgqv6jpeLXW1gO\npwJzgK0lLQP2p7o3Yb6kfW2f3qE8ZgG2vVzSdGA2cEunPiPlOt/vhYA3StqZKrG3dSKPZiSda/u4\nbn5fSHo9VXP3G2x/qe3ryzWVwUn6MHAs1Q1UfSU8mephYIttL+hWbv0kvcd2J5sVdzUPSR8ETqC6\nZ2cGVbPmC8u4X9pu+81dvZBDWdeqsv5tgd8Ak20/XFriXWX71R3I4dnCBjQWtrcAl3SisEn6JXAT\n8BWqf/wEfJ3qc4rtK9qdQ8mjaXEDflTy6Ehxk3S17Vll+H1Uf6sXAIcA32v791a7b4QZyy+qm/vG\nN4lvQwdvqhomxzu7nUMn8wBWATuV4alUrWxOKu+v3VJyGLiugesFVnZwX4wDdgAeBl5Q4tsD13co\nh62A/0NV1GaU2O2d+j005PFL4KvAwcBB5efdZfigLv1dLAcmluEdgVXtXn9Ofw3td8AebHpKY/cy\nriMkXT/YKKBj3bX0SB7j/FwDgbWSDga+JWkvOncKrhdyAHiqoVn3a/uDkl5I5/4+n7b9DPC4pF/Z\nfhjA9hOSOpKD7d8Bn5P0zfLzHrpzan8mcBLwj8CHbK+U9IQ7dKTUYCtVnWpuRXU2agOA7cckPd3u\nlaeoDO1k4DJJq3nu/pA9gT8ETuxgHrsBhwIPDIgL+PkWlsdvJM2wvRLAVZPmtwILgT/egnIAeIPt\nJ0sOjV/g44G5HcqhFwobALb7gHdIOpzqqKmjeqi4vRC4hupzaUkvsf0bSTvRgX96ck1lGJK2orrI\n1Xh/yPLy31mncjgb+C/bP20y7mu237ml5CFpMtV/x79pMu5A2z/bEnLoFZK27S9sA+K7ArvbXtWF\ntHpCKW4H2j6l27kASNoB2M32HW1dT4pKRETUJfepREREbVJUIiKiNikqETWS9IyklZJukPS9/hvw\nhph+Z0kfaHi/h6RvtT/TiPbINZWIGkl61PZOZXgRcJuHuAFQ1eNuv2/7VZ3JMKK9cqQS0T6/oGo1\niKSdVD1e9peSVknq7z5/AfCycnTzaUlTJd1Q5nm3pO9I+oGk1ZL+pX/Bko6XdJukyyV9WdLnO751\nEU3kPpWINpA0Dngzzz0k6n+Ao1x1o7IrcGXp1mM+8CrbM8p8UwcsagawL/AkcKuk/wCeAf4J2A94\nhKobkOvaukERLUpRiajX9pJWUnXfcg1V1yFQ3eP0KUlvoLopcBKt9UJwme2HACTdBOwF7ApcYXtj\niX8TeHmdGxExWjn9FVGvJ8rTVBCMAAAA2klEQVRRx15UfcSdUOJ/BUwEXlvG3wNs18LyGm8sfIbq\nH8Fe6Zk6YhMpKhFtUI4uPgj8X0njqbrOuNf2byW9karoQHX66g9GuPirgYMkTZC0NfC/6so7YnOl\nqES0ie1rqa51HAP8NzBT0gqqo5ZbyjT3Az8rTZA/3eJy7wI+BVwFXErV7ftD9W9BxMilSXHEGCRp\np9KR5dZUz8pYaPuCbucVkSOViLHptNIg4AbgDuC7Xc4nAsiRSkRE1ChHKhERUZsUlYiIqE2KSkRE\n1CZFJSIiapOiEhERtfn/U4THd07WbZEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1145ce828>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ratings.rating.value_counts().sort_index().plot.bar()\n",
    "plt.title(\"Frequency plot for rating\")\n",
    "plt.xlabel(\"Rating\")\n",
    "plt.ylabel(\"Frequency\")\n",
    "plt.savefig(\"/tmp/rating.jpeg\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>len</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>movieId</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3.872470</td>\n",
       "      <td>247.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.401869</td>\n",
       "      <td>107.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3.161017</td>\n",
       "      <td>59.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2.384615</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3.267857</td>\n",
       "      <td>56.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             mean    len\n",
       "movieId                 \n",
       "1        3.872470  247.0\n",
       "2        3.401869  107.0\n",
       "3        3.161017   59.0\n",
       "4        2.384615   13.0\n",
       "5        3.267857   56.0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agg = ratings.groupby(\"movieId\").rating.agg([np.mean, len])\n",
    "agg.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "agg.to_sql(\"rating_avg\", conn, if_exists=\"append\") # Save the dataframe to database"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
